podatki <- read.table("/cloud/project/Poglavje 5/Naloga 2/Priljubljenost.csv", header=TRUE, sep=";", dec=",")
head(podatki)
## Razred Osnovnošolec Priljubljenost Ekstravertiranost Spol
## 1 1 1 6.3 5 1
## 2 1 2 4.9 7 0
## 3 1 3 5.3 4 1
## 4 1 4 4.7 3 1
## 5 1 5 6.0 5 1
## 6 1 6 4.7 4 0
Opis spremenljivk:
podatki$SpolFaktor <- factor(podatki$Spol,
levels = c(0, 1),
labels = c("fant", "dekle"))
fit_OLS <- lm(Priljubljenost ~ Ekstravertiranost + SpolFaktor,
data = podatki)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:DescTools':
##
## Recode
vif(fit_OLS)
## Ekstravertiranost SpolFaktor
## 1.00803 1.00803
podatki$StdOst <- round(rstandard(fit_OLS), 3)
podatki$CooksD <- round(cooks.distance(fit_OLS), 3)
hist(podatki$StdOst,
main = "Histogram standardiziranih ostankov",
ylab = "Frekvenca",
xlab = "Standardizirani ostanki")
podatki$StdOcene <- scale(fit_OLS$fitted.values)
library(ggplot2)
ggplot(podatki, aes(y=StdOst, x=StdOcene)) +
theme_linedraw() +
geom_point() +
xlab("Standardizirane ocenjene vrednosti") +
ylab("Standardizirani ostanki")
head(podatki[order(podatki$StdOst), c("Razred", "Osnovnošolec", "StdOst")])
## Razred Osnovnošolec StdOst
## 1622 82 9 -3.951
## 627 31 18 -3.038
## 1632 82 19 -3.015
## 949 48 10 -2.934
## 1619 82 6 -2.929
## 1989 100 9 -2.929
head(podatki[order(-podatki$StdOst), c("Razred", "Osnovnošolec", "StdOst")])
## Razred Osnovnošolec StdOst
## 1666 84 14 2.831
## 1266 64 4 2.826
## 1273 64 12 2.826
## 1268 64 6 2.815
## 364 18 14 2.732
## 588 29 13 2.726
head(podatki[order(-podatki$CooksD), c("Razred", "Osnovnošolec", "CooksD")])
## Razred Osnovnošolec CooksD
## 1274 64 13 0.010
## 659 33 7 0.008
## 627 31 18 0.007
## 1268 64 6 0.007
## 588 29 13 0.006
## 656 33 4 0.006
podatkiC <- podatki[c(-1622, -627, -1632, -1274), ]
fit_OLS <- lm(Priljubljenost ~ Ekstravertiranost + SpolFaktor,
data = podatkiC)
summary(fit_OLS)
##
## Call:
## lm(formula = Priljubljenost ~ Ekstravertiranost + SpolFaktor,
## data = podatkiC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1635 -0.6694 -0.0487 0.7365 3.0454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.79289 0.10288 27.15 <2e-16 ***
## Ekstravertiranost 0.29412 0.01903 15.45 <2e-16 ***
## SpolFaktordekle 1.49113 0.04797 31.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.067 on 1993 degrees of freedom
## Multiple R-squared: 0.3952, Adjusted R-squared: 0.3946
## F-statistic: 651.2 on 2 and 1993 DF, p-value: < 2.2e-16
fit_ML <- glm(Priljubljenost ~ Ekstravertiranost + SpolFaktor,
data = podatkiC)
summary(fit_ML)
##
## Call:
## glm(formula = Priljubljenost ~ Ekstravertiranost + SpolFaktor,
## data = podatkiC)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1635 -0.6694 -0.0487 0.7365 3.0454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.79289 0.10288 27.15 <2e-16 ***
## Ekstravertiranost 0.29412 0.01903 15.45 <2e-16 ***
## SpolFaktordekle 1.49113 0.04797 31.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.138731)
##
## Null deviance: 3752.5 on 1995 degrees of freedom
## Residual deviance: 2269.5 on 1993 degrees of freedom
## AIC: 5928.7
##
## Number of Fisher Scoring iterations: 2
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## --------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## --------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename,
## summarise, summarize
## The following object is masked from 'package:ggpubr':
##
## mutate
Statistika <- summarySE(podatkiC,
measurevar="Priljubljenost",
groupvars=c("Razred"),
conf.interval=0.95)
head(Statistika)
## Razred N Priljubljenost sd se ci
## 1 1 20 5.075000 0.9535943 0.2132302 0.4462959
## 2 2 20 4.105000 0.9991970 0.2234273 0.4676386
## 3 3 18 4.694444 1.1649147 0.2745730 0.5792984
## 4 4 23 5.434783 0.8138621 0.1697020 0.3519404
## 5 5 21 5.285714 0.9509394 0.2075120 0.4328624
## 6 6 20 4.335000 0.9675117 0.2163422 0.4528094
library(ggplot2)
ggplot(Statistika, aes(x=Razred, y=Priljubljenost)) +
geom_bar(position=position_dodge(), stat="identity") +
geom_errorbar(aes(ymin=Priljubljenost-ci, ymax=Priljubljenost+ci),
width=0.1,
position=position_dodge(.9)) +
theme_linedraw()
library(lme4)
library(lmerTest)
hierar_fit0 <- lmer(Priljubljenost ~ (1 | Razred),
REML = FALSE,
data = podatkiC)
summary(hierar_fit0)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: Priljubljenost ~ (1 | Razred)
## Data: podatkiC
##
## AIC BIC logLik deviance df.resid
## 6302.5 6319.3 -3148.2 6296.5 1993
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2185 -0.6993 0.0004 0.6718 3.3314
##
## Random effects:
## Groups Name Variance Std.Dev.
## Razred (Intercept) 0.680 0.8246
## Residual 1.211 1.1005
## Number of obs: 1996, groups: Razred, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.0819 0.0861 99.7584 59.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hierar_fit1 <- lmer(Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred),
REML = FALSE,
data = podatkiC)
summary(hierar_fit1)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred)
## Data: podatkiC
##
## AIC BIC logLik deviance df.resid
## 4907.4 4935.4 -2448.7 4897.4 1991
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.05053 -0.66397 -0.00815 0.67481 2.99750
##
## Random effects:
## Groups Name Variance Std.Dev.
## Razred (Intercept) 0.6081 0.7798
## Residual 0.5837 0.7640
## Number of obs: 1996, groups: Razred, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.14898 0.11621 401.82925 18.49 <2e-16
## Ekstravertiranost 0.44080 0.01608 1955.58073 27.41 <2e-16
## SpolFaktordekle 1.25043 0.03716 1924.95455 33.65 <2e-16
##
## (Intercept) ***
## Ekstravertiranost ***
## SpolFaktordekle ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Ekstrv
## Ekstrvrtrns -0.708
## SpolFktrdkl -0.101 -0.085
anova(hierar_fit0, hierar_fit1, test="Chi")
## Data: podatkiC
## Models:
## hierar_fit0: Priljubljenost ~ (1 | Razred)
## hierar_fit1: Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hierar_fit0 3 6302.5 6319.3 -3148.2 6296.5
## hierar_fit1 5 4907.4 4935.4 -2448.7 4897.4 1399.1 2 < 2.2e-16
##
## hierar_fit0
## hierar_fit1 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hierar_fit2 <- lmer(Priljubljenost ~ Ekstravertiranost + SpolFaktor + Ekstravertiranost:SpolFaktor + (1 | Razred),
REML = FALSE,
data = podatkiC)
summary(hierar_fit2)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## Priljubljenost ~ Ekstravertiranost + SpolFaktor + Ekstravertiranost:SpolFaktor +
## (1 | Razred)
## Data: podatkiC
##
## AIC BIC logLik deviance df.resid
## 4908.7 4942.3 -2448.3 4896.7 1990
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0348 -0.6725 -0.0016 0.6817 3.0342
##
## Random effects:
## Groups Name Variance Std.Dev.
## Razred (Intercept) 0.6086 0.7801
## Residual 0.5834 0.7638
## Number of obs: 1996, groups: Razred, 100
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 2.21684 0.14075 755.83468
## Ekstravertiranost 0.42739 0.02248 1926.23227
## SpolFaktordekle 1.11999 0.15697 1920.96280
## Ekstravertiranost:SpolFaktordekle 0.02529 0.02957 1923.95786
## t value Pr(>|t|)
## (Intercept) 15.750 < 2e-16 ***
## Ekstravertiranost 19.016 < 2e-16 ***
## SpolFaktordekle 7.135 1.37e-12 ***
## Ekstravertiranost:SpolFaktordekle 0.855 0.393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Ekstrv SplFkt
## Ekstrvrtrns -0.812
## SpolFktrdkl -0.568 0.664
## Ekstrvrt:SF 0.564 -0.699 -0.972
hierar_fit3 <- lmer(Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 + Ekstravertiranost | Razred),
REML = FALSE,
data = podatkiC)
summary(hierar_fit3)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 + Ekstravertiranost |
## Razred)
## Data: podatkiC
##
## AIC BIC logLik deviance df.resid
## 4842.8 4882.0 -2414.4 4828.8 1989
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.05257 -0.65655 -0.00629 0.67289 3.09288
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Razred (Intercept) 2.42166 1.5562
## Ekstravertiranost 0.02672 0.1635 -0.94
## Residual 0.54917 0.7411
## Number of obs: 1996, groups: Razred, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.09146 0.17742 96.66425 11.79 <2e-16
## Ekstravertiranost 0.44264 0.02273 93.08654 19.47 <2e-16
## SpolFaktordekle 1.24310 0.03632 1909.17058 34.23 <2e-16
##
## (Intercept) ***
## Ekstravertiranost ***
## SpolFaktordekle ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Ekstrv
## Ekstrvrtrns -0.911
## SpolFktrdkl -0.062 -0.062
anova(hierar_fit1, hierar_fit3, test="Chi")
## Data: podatkiC
## Models:
## hierar_fit1: Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 | Razred)
## hierar_fit3: Priljubljenost ~ Ekstravertiranost + SpolFaktor + (1 + Ekstravertiranost | Razred)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## hierar_fit1 5 4907.4 4935.4 -2448.7 4897.4
## hierar_fit3 7 4842.8 4882.0 -2414.4 4828.8 68.606 2 1.266e-15
##
## hierar_fit1
## hierar_fit3 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
podatkiC$Ocena_hierar_fit3 <- predict(hierar_fit3)
podatkiC[5, ]
## Razred Osnovnošolec Priljubljenost Ekstravertiranost Spol
## 5 1 5 6 5 1
## SpolFaktor StdOst CooksD StdOcene Ocena_hierar_fit3
## 5 dekle 0.225 0 0.785374 5.751353